package com.mao.utils;


import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;

/**
 * @Description:
 * @Param:
 * @Thought:
 * @Author: Wenjie Mao
 * @create： 2022-11-15 21:27
 */
@Deprecated
public class crawlJson {

    public static void main(String[] args) {
        crawlJson crawlJson = new crawlJson();
        int pageIndex = 61;
        while(true){
            System.out.println("crawling page" + pageIndex+"...");
            try {
                System.out.println("waiting...");
//                Thread.sleep(5000);
            } catch (Exception e) {
                e.printStackTrace();
            }
            crawlJson.webToJson(pageIndex);
            pageIndex++;
        }

    }

    public void webToJson(int pageIndex){
        Connection conn = null;
        Document document = null;
        FileOutputStream fileOutputStream = null;
        try {
            String url = "https://gitee.com/api/v5/search/repositories?q=Spark&page="+pageIndex+"&per_page=100&order=desc";
            conn = Jsoup.connect(url).ignoreContentType(true);

            conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
            conn.header("Accept-Encoding", "gzip, deflate, sdch");
            conn.header("Accept-Language", "zh-CN,zh;q=0.8");
            conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
//            document = Jsoup.connect(url).header("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2").ignoreContentType(true).get();

            document = conn.get();

            Elements elements = document.getElementsByTag("body");
            Element element = elements.get(0);
            String s = element.toString();
            s = s.replace("<body>","").replace("</body>","").replace("\n","");
            File file = new File("E:\\Android\\cloud-computing-nju2022\\java\\mao\\src\\newJsonSpark\\SparkRepo_Page" + pageIndex + ".json");
            file.createNewFile();
            fileOutputStream = new FileOutputStream(file);
            fileOutputStream.write(s.getBytes());
            fileOutputStream.flush();

        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            try {
                fileOutputStream.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}
